Load in necessary packages
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0 ✓ purrr 0.3.4
## ✓ tibble 3.0.1 ✓ dplyr 0.8.5
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(sf)
## Linking to GEOS 3.7.2, GDAL 2.4.2, PROJ 5.2.0
library(mapview)
library(tigris)
## To enable
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
##
## Attaching package: 'tigris'
## The following object is masked from 'package:graphics':
##
## plot
library(censusapi)
##
## Attaching package: 'censusapi'
## The following object is masked from 'package:methods':
##
## getFunction
library(leaflet)
library(lehdr)
library(fs)
options(
tigris_class = "sf",
tigris_use_cache = TRUE
)
Sys.setenv(CENSUS_KEY="10dcd73d7c043e91bac9fb8d3989cbff54b08790")
The data used to assess the % of workers deemed essential was determined using the Labor Market Information (LMI) Institute’s Standard Occupation Classification (SOC) codes connectd to essential businesses. (https://www.lmiontheweb.org/more-than-half-of-u-s-workers-in-critical-occupations-in-the-fight-against-covid-19/). The SOC codes the LMI institute provided are based on the Cybersecurity and Infrastructure Security Agency’s list from April 17, 2020 (https://www.cisa.gov/sites/default/files/publications/Version_3.0_CISA_Guidance_on_Essential_Critical_Infrastructure_Workers_4.pdf). These codes have the advantage of being directly linked to census respondees so that each respondee has one SOC code.
Load Bay Area PUMA-level data. You can see how PUMA (Public Use Microdata Area) level regions compare to tract/blockgroup level regions here (https://tigerweb.geo.census.gov/tigerweb/) Visualization of the PUMAs
# get Bay Area County PUMAS
bay_area_county_names <-
c(
"Alameda",
"Contra Costa",
"Marin",
"Napa",
"San Francisco",
"San Mateo",
"Santa Clara",
"Solano",
"Sonoma"
)
bay_pumas <- pumas("CA", cb=F, progress_bar=F) %>%
mutate(county = substr(GEOID10, start = 1, stop =5)) %>%
filter(county %in% c("06085","06081","06075","06041","06097","06055","06095","06013","06001"))
bay_pumas_codes <-
bay_pumas %>% pull(PUMACE10)
projection <- "+proj=utm +zone=10 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=ft +no_defs"
bay_area_county_names <-
c("Alameda","Contra Costa","Marin","Napa","San Francisco","San Mateo","Santa Clara","Solano","Sonoma")
#Water to clean up boundaries of PUMAs
water <-
bay_area_county_names %>%
map_dfr(function(county){ # this is the tidyverse version of a for loop. You can practice writing this the normal for loop way.
area_water("CA", county, progress_bar = FALSE) %>% as.data.frame()
}) %>%
st_as_sf() %>%
st_set_crs(st_crs(counties("CA", cb=F, progress_bar = FALSE)
)) %>%
st_union() %>%
as_Spatial() %>%
sp::disaggregate() %>%
st_as_sf() %>%
st_set_crs(st_crs(counties("CA", cb=F, progress_bar = FALSE)
)) %>%
mutate(area = st_area(.) %>% as.numeric()) %>%
filter(area == max(area)) %>%
dplyr::select(-area)
## Warning in bind_rows_(x, .id): Vectorizing 'sfc_POLYGON' elements may not
## preserve their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'sfc_POLYGON' elements may not
## preserve their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'sfc_POLYGON' elements may not
## preserve their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'sfc_POLYGON' elements may not
## preserve their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'sfc_POLYGON' elements may not
## preserve their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'sfc_POLYGON' elements may not
## preserve their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'sfc_POLYGON' elements may not
## preserve their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'sfc_POLYGON' elements may not
## preserve their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'sfc_POLYGON' elements may not
## preserve their attributes
bay_pumas_fixed <-
bay_pumas %>%
st_difference(water)
## although coordinates are longitude/latitude, st_difference assumes that they are planar
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
mapview(bay_pumas_fixed)
print(str_c("There are ",dim(bay_pumas_fixed)[[1]], " PUMAs in the Bay Area")) #Nrows
## [1] "There are 55 PUMAs in the Bay Area"
data_dir <- "/Users/spencerrobinson/Documents/covid19/Spencer_Robinson/"
raw_soc <- readxl::read_xlsx(str_c(data_dir,"SOC-Codes-CISA-Critical-Infrastructure-Workers-with-OES-Data.xlsx"))
soc <- raw_soc %>%
mutate(Critical = replace_na(Critical, FALSE)) %>%
mutate(Critical = replace(Critical, Critical == "X",TRUE)) %>%
mutate(`SOC Occupation` = str_remove(`SOC Occupation`,"-")) %>% #Places SOC code in the format used in PUMS data
mutate(Critical = as.logical(Critical)) %>%
select(`SOC Occupation`,`Occupation Description`,Critical)
#Visualize Breakdown
soc %>%
group_by(Critical) %>%
summarize(SOCs = n()) %>%
mutate(Critical = replace(Critical, Critical == FALSE ,"Nonessential")) %>%
mutate(Critical = replace(Critical, Critical == TRUE ,"Essential")) %>%
ggplot(aes(Critical,SOCs)) + geom_col()
# puma_soc <- read_csv("/Users/spencerrobinson/pCloud Drive/SFBI/Data Library/PUMS/csv_pca/psam_p06.csv")
# colnames(puma_soc)
# bay_puma_soc <- puma_soc %>%
# filter(PUMA %in% bay_pumas_codes) #Filters for just 9 Bay Area Counties
# saveRDS(bay_puma_soc, file = "/Users/spencerrobinson/Documents/covid19/Spencer_Robinson/bay_puma_soc.rds")
bay_puma_soc <- readRDS(file = "/Users/spencerrobinson/Documents/covid19/Spencer_Robinson/bay_puma_soc.rds") #PUMA = Public Use Microdata Area
soc_essential <- bay_puma_soc %>%
select("PUMA","SOCP") %>%
left_join(soc, by = c("SOCP" = "SOC Occupation")) %>%
mutate(`Occupation Description` = ifelse(is.na(SOCP), replace_na(`Occupation Description`, "Not In Labor Force"),`Occupation Description`)) #Remove those not in the labor force
top_10_visualize_with_na <- soc_essential %>%
group_by(SOCP, `Occupation Description`) %>%
tally() %>%
arrange(desc(n)) %>%
head(n = 10) %>%
ggplot(aes(reorder(SOCP,-n), n))+geom_col()+theme(axis.text.x = element_text(angle = 45, vjust = 0.5))
top_10_visualize_with_na
top_10 <- soc_essential %>%
filter(!is.na(SOCP)) %>% #Filter out those not in work force
group_by(SOCP, `Occupation Description`,Critical) %>%
tally() %>%
arrange(desc(n)) %>%
head(n = 10)
top_10
## # A tibble: 10 x 4
## # Groups: SOCP, Occupation Description [10]
## SOCP `Occupation Description` Critical n
## <chr> <chr> <lgl> <int>
## 1 1191XX <NA> NA 7881
## 2 151252 Software Developers TRUE 7864
## 3 412031 Retail Salespersons FALSE 4997
## 4 252020 <NA> NA 4786
## 5 412010 <NA> NA 4282
## 6 291141 Registered Nurses TRUE 4078
## 7 132011 Accountants and Auditors FALSE 4032
## 8 436014 Secretaries and Administrative Assistants, Except Lega… FALSE 3859
## 9 434051 Customer Service Representatives TRUE 3532
## 10 411011 First-Line Supervisors of Retail Sales Workers TRUE 3449
top_10_visualize <- top_10 %>%
filter(!is.na(SOCP)) %>% #Filter out those not in work force
ggplot(aes(reorder(SOCP,-n), n, fill = Critical ))+geom_col()+labs(x = "SOCP Code")+ theme(axis.text.x = element_text(angle = 45, vjust = 0.5))
top_10_visualize
essential_visualize <- soc_essential %>%
filter(!is.na(SOCP)) %>% #Filter out those not in work force
group_by(Critical) %>%
tally() %>%
ggplot()+geom_col(aes(x = "", y = n , fill = Critical), stat = "identity", position = "fill")
## Warning: Ignoring unknown parameters: stat
essential_visualize
#Using fraction Essential instead of TRUE/FALSE
soc_frac <- soc %>%
mutate(Critical = as.numeric(Critical)) %>%
rename("fracEssential" = Critical)
soc_4_dig <- soc_frac %>%
mutate(`SOC Occupation` = str_c(substr(`SOC Occupation`, 1,4),"XX")) %>%
group_by(`SOC Occupation` , fracEssential) %>%
summarize(count = n()) %>%
spread(fracEssential,count) %>%
mutate(`1` = replace_na(`1`,0)) %>%
mutate(`0` = replace_na(`0`, 0)) %>%
rename("Nonessential" = `0`, "Essential" = `1` ) %>%
mutate(fracEssential = Essential/(Essential+Nonessential)) %>%
select(`SOC Occupation` ,fracEssential)
soc_5_dig <- soc_frac %>%
mutate(`SOC Occupation` = str_c(substr(`SOC Occupation`, 1,5),"X")) %>%
group_by(`SOC Occupation` , fracEssential) %>%
summarize(count = n()) %>%
spread(fracEssential,count) %>%
mutate(`1` = replace_na(`1`,0)) %>%
mutate(`0` = replace_na(`0`, 0)) %>%
rename("Nonessential" = `0`, "Essential" = `1` ) %>%
mutate(fracEssential = Essential/(Essential+Nonessential)) %>%
select(`SOC Occupation` ,fracEssential)
soc_frac_adj <- soc_frac %>%
full_join(soc_4_dig) %>%
full_join(soc_5_dig)
## Joining, by = c("SOC Occupation", "fracEssential")
## Joining, by = c("SOC Occupation", "fracEssential")
soc_essential_frac <- bay_puma_soc %>%
select("PUMA","SOCP") %>%
left_join(soc_frac_adj, by = c("SOCP" = "SOC Occupation")) %>%
mutate(`Occupation Description` = ifelse(is.na(SOCP), replace_na(`Occupation Description`, "Not In Labor Force"),`Occupation Description`)) #Remove those not in the labor force
top_10_visualize_with_na_adj <- soc_essential_frac %>%
group_by(SOCP, `Occupation Description`) %>%
tally() %>%
arrange(desc(n)) %>%
head(n = 10) %>%
ggplot(aes(reorder(SOCP,-n), n))+geom_col()+theme(axis.text.x = element_text(angle = 45, vjust = 0.5))
top_10_visualize_with_na_adj
top_10_visualize_adj <- soc_essential_frac %>%
filter(!is.na(SOCP)) %>% #Filter out those not in work force
group_by(SOCP, `Occupation Description`, fracEssential) %>%
tally() %>%
arrange(desc(n)) %>%
head(n = 10) %>%
filter(!is.na(SOCP)) %>% #Filter out those not in work force
ggplot(aes(reorder(SOCP,-n), n, fill = fracEssential ))+geom_col()+labs(x = "SOCP Code")+ theme(axis.text.x = element_text(angle = 45, vjust = 0.5))
top_10_visualize_adj
essential_visualize <- soc_essential_frac %>%
filter(!is.na(SOCP)) %>% #Filter out those not in work force
mutate(fracEssential_present = ifelse(!is.na(fracEssential),TRUE,FALSE)) %>%
group_by(fracEssential_present) %>%
tally() %>%
ggplot()+geom_col(aes(x = "", y = n , fill = fracEssential_present), stat = "identity", position = "fill")
## Warning: Ignoring unknown parameters: stat
essential_visualize